Reading the files PDB and Trajectories REQ-1 and REQ-2
## Reading the files PDB and Trajectories **REQ-1** **REQ-2**
pdb <- read_pdb("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/ref.pdb")
trj1 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/TEST_001.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/TEST_001.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 101"
> [1] "Atoms: 1782"
trj2 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_002.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_002.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj3 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_003.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_003.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj4 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_004.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_004.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj5 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_005.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_005.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj6 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_006.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_006.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj7 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_007.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_007.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj8 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_008.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_008.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj9 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_009.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_009.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj10 <- read_ncdf("~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_010.nc")
> [1] "Reading file ~/Documents/Dissertation/provided files/Medium_Dataset/Medium_Dataset/REP_010.nc"
> [1] "Produced by program: MDTraj"
> [1] "File conventions AMBER version 1.0"
> [1] "Frames: 401"
> [1] "Atoms: 1782"
trj<- rbind(trj1 , trj2 ,trj3, trj4 ,trj5, trj6 ,trj7 ,trj8 ,trj9 ,trj10)
trjs2 <- rbind(trj1 , trj2 ,trj3, trj4 ,trj5 )
❯ mdconvert -o REP_010.nc REP_010.xtc
Concatinating the pdb and trj files REQ-2
merged_pdb <- Merge_pdb_trj(pdb1 , trj)
merged_pdb2 <- Merge_pdb_trj(pdb1 , trjs2)
filter the pdb and choose C*REQ-2**
filtered_pdb <- filter_atoms(merged_pdb, atom = "CB")
filtered_pdb2 <- filter_atoms(merged_pdb2, atom = "CB")
pre-alignment the frames REQ-3
gaps <- gap.inspect(filtered_pdb$xyz)
prealign_pdb <- fit_dist_frame(filtered_pdb$xyz[1,],
filtered_pdb$xyz,
fixed.inds = gaps$f.inds,
mobile.inds = gaps$f.inds)
distance matrix REQ-4
dm_pdb <- dist_mat_frame(filtered_pdb2)
>
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
train the SOM REQ-5
## using pre alignment object to train
som_pre_trj <- som(prealign_pdb , grid = somgrid( 8 , 8 , "hexagonal"))
## using pre distance object to train
som_dm_trj <- som(as.matrix(dm_pdb), grid = somgrid( 8 , 8 , "hexagonal" ))
find the optimal number of clusters REQ-6
cluster_neuron(som_pre_trj , max_cluster = 9)
> NOTE: the method used to generate distance matrix was euclidean,
> however, if other method are considered, replace kohonen object with dist object or
> dist(<som object>$codes[[1]] , <chosen method>)
> # A tibble: 4 × 2
> `Number of clusters` `silhouette score \n average`
> <int> <dbl>
> 1 6 0.394
> 2 7 0.378
> 3 8 0.391
> 4 9 0.372
> $`6`
> $`6`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 1 2 3 3 3 3 1 1 1 1 2 3 3 3 1 1 1 4
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 4 3 3 3 1 5 1 4 4 4 3 3 1 5 5 4 4 4 3 3
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 1 5 5 5 4 4 3 3 5 5 5 6 6 3 3 3 5 5 5 5
> V61 V62 V63 V64
> 6 6 6 3
>
> $`6`$silhouette
> cluster neighbor sil_width
> [1,] 1 5 0.48623506
> [2,] 1 5 0.51123564
> [3,] 1 5 0.31522289
> [4,] 2 4 0.57781934
> [5,] 3 2 0.54729735
> [6,] 3 2 0.64668117
> [7,] 3 4 0.61138302
> [8,] 3 4 0.58294651
> [9,] 1 5 0.50033636
> [10,] 1 5 0.50709923
> [11,] 1 5 0.39173709
> [12,] 1 5 0.20215358
> [13,] 2 3 0.53663642
> [14,] 3 4 0.65541309
> [15,] 3 4 0.65531093
> [16,] 3 4 0.63757547
> [17,] 1 5 0.49537316
> [18,] 1 5 0.49483937
> [19,] 1 5 0.33039037
> [20,] 4 5 0.25484456
> [21,] 4 3 0.36872990
> [22,] 3 4 0.65413450
> [23,] 3 4 0.66324604
> [24,] 3 4 0.65076226
> [25,] 1 5 0.40575964
> [26,] 5 1 -0.19542910
> [27,] 1 5 -0.14219199
> [28,] 4 5 0.50066758
> [29,] 4 5 0.63353789
> [30,] 4 6 0.54499176
> [31,] 3 4 0.53104393
> [32,] 3 4 0.60461938
> [33,] 1 5 0.22563653
> [34,] 5 1 0.34681089
> [35,] 5 4 -0.14769731
> [36,] 4 5 0.44468523
> [37,] 4 6 0.63239553
> [38,] 4 3 0.39049711
> [39,] 3 4 0.36686179
> [40,] 3 4 0.57141520
> [41,] 1 5 0.03329442
> [42,] 5 1 0.47371913
> [43,] 5 4 0.37075921
> [44,] 5 4 0.13923814
> [45,] 4 6 0.58467590
> [46,] 4 6 0.50513252
> [47,] 3 4 -0.00614571
> [48,] 3 4 0.38049725
> [49,] 5 1 0.25820039
> [50,] 5 4 0.54752717
> [51,] 5 6 0.23243686
> [52,] 6 5 0.32873244
> [53,] 6 4 0.46242425
> [54,] 3 6 -0.19513327
> [55,] 3 4 0.09815320
> [56,] 3 4 0.38982729
> [57,] 5 1 0.36491154
> [58,] 5 1 0.52827269
> [59,] 5 1 0.29035729
> [60,] 5 6 0.07996597
> [61,] 6 5 0.42876385
> [62,] 6 4 0.57681528
> [63,] 6 4 0.30203032
> [64,] 3 6 0.05615668
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`6`$summary
> Silhouette of 64 units in 6 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 14 2 20 10 13 5
> 0.3397944 0.5572279 0.4551023 0.4860158 0.2530056 0.4197532
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.1954 0.2991 0.4536 0.3941 0.5535 0.6632
>
>
> $`7`
> $`7`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 1 2 3 3 3 3 1 1 1 1 2 3 3 3 1 1 1 4
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 4 3 3 3 1 5 1 4 4 4 3 3 1 5 5 4 4 4 6 3
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 1 5 5 5 4 4 6 6 5 5 5 7 7 6 6 6 5 5 5 5
> V61 V62 V63 V64
> 7 7 7 6
>
> $`7`$silhouette
> cluster neighbor sil_width
> [1,] 1 5 0.486235062
> [2,] 1 5 0.511235645
> [3,] 1 5 0.315222893
> [4,] 2 4 0.577819344
> [5,] 3 6 0.480505999
> [6,] 3 6 0.671251104
> [7,] 3 6 0.617014733
> [8,] 3 6 0.571681721
> [9,] 1 5 0.500336364
> [10,] 1 5 0.507099232
> [11,] 1 5 0.391737085
> [12,] 1 5 0.202153584
> [13,] 2 3 0.522584038
> [14,] 3 6 0.651564195
> [15,] 3 6 0.668018377
> [16,] 3 6 0.656797552
> [17,] 1 5 0.495373156
> [18,] 1 5 0.494839374
> [19,] 1 5 0.330390367
> [20,] 4 5 0.254844563
> [21,] 4 6 0.001239274
> [22,] 3 6 0.585834033
> [23,] 3 6 0.646207181
> [24,] 3 6 0.445370847
> [25,] 1 5 0.405759638
> [26,] 5 1 -0.195429102
> [27,] 1 5 -0.142191986
> [28,] 4 5 0.500667584
> [29,] 4 6 0.611496445
> [30,] 4 6 0.322847773
> [31,] 3 6 0.206281855
> [32,] 3 6 0.157084470
> [33,] 1 5 0.225636531
> [34,] 5 1 0.346810891
> [35,] 5 4 -0.147697307
> [36,] 4 5 0.444685227
> [37,] 4 6 0.590226121
> [38,] 4 6 -0.136252415
> [39,] 6 3 0.481046113
> [40,] 3 6 -0.173342891
> [41,] 1 5 0.033294422
> [42,] 5 1 0.473719132
> [43,] 5 4 0.370759213
> [44,] 5 4 0.139238135
> [45,] 4 7 0.584675904
> [46,] 4 6 0.365123699
> [47,] 6 4 0.524748624
> [48,] 6 3 0.512166092
> [49,] 5 1 0.258200385
> [50,] 5 4 0.547527170
> [51,] 5 7 0.232436857
> [52,] 7 5 0.328732443
> [53,] 7 6 0.426985839
> [54,] 6 7 0.428874578
> [55,] 6 4 0.592362505
> [56,] 6 3 0.550525442
> [57,] 5 1 0.364911542
> [58,] 5 1 0.528272686
> [59,] 5 1 0.290357292
> [60,] 5 7 0.079965966
> [61,] 7 5 0.428763849
> [62,] 7 6 0.517197781
> [63,] 7 6 -0.053802126
> [64,] 6 7 0.582440686
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`7`$summary
> Silhouette of 64 units in 7 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 14 2 13 10 13 7 5
> 0.3397944 0.5502017 0.4757130 0.3539554 0.2530056 0.5245949 0.3295756
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.1954 0.2574 0.4450 0.3780 0.5331 0.6713
>
>
> $`8`
> $`8`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 2 3 4 4 4 4 1 1 2 2 3 4 4 4 1 1 2 5
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 5 4 4 4 1 6 2 5 5 5 4 4 1 6 6 5 5 5 7 4
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 1 6 6 6 5 5 7 7 6 6 6 8 8 7 7 7 6 6 6 6
> V61 V62 V63 V64
> 8 8 8 7
>
> $`8`$silhouette
> cluster neighbor sil_width
> [1,] 1 2 0.548399290
> [2,] 1 2 0.508211444
> [3,] 2 1 0.482695637
> [4,] 3 2 0.486012596
> [5,] 4 7 0.480505999
> [6,] 4 7 0.671251104
> [7,] 4 7 0.617014733
> [8,] 4 7 0.571681721
> [9,] 1 2 0.614348771
> [10,] 1 2 0.640690523
> [11,] 2 1 0.194573104
> [12,] 2 1 0.542185027
> [13,] 3 4 0.522584038
> [14,] 4 7 0.651564195
> [15,] 4 7 0.668018377
> [16,] 4 7 0.656797552
> [17,] 1 2 0.606432781
> [18,] 1 2 0.461347781
> [19,] 2 1 0.534159703
> [20,] 5 2 0.033384271
> [21,] 5 7 0.001239274
> [22,] 4 7 0.585834033
> [23,] 4 7 0.646207181
> [24,] 4 7 0.445370847
> [25,] 1 2 0.584368411
> [26,] 6 1 -0.326686254
> [27,] 2 6 -0.099906329
> [28,] 5 6 0.500667584
> [29,] 5 7 0.611496445
> [30,] 5 7 0.322847773
> [31,] 4 7 0.206281855
> [32,] 4 7 0.157084470
> [33,] 1 6 0.434192547
> [34,] 6 1 0.329290373
> [35,] 6 5 -0.147697307
> [36,] 5 6 0.444685227
> [37,] 5 7 0.590226121
> [38,] 5 7 -0.136252415
> [39,] 7 4 0.481046113
> [40,] 4 7 -0.173342891
> [41,] 1 6 0.234783841
> [42,] 6 1 0.457852127
> [43,] 6 5 0.370759213
> [44,] 6 5 0.139238135
> [45,] 5 8 0.584675904
> [46,] 5 7 0.365123699
> [47,] 7 5 0.524748624
> [48,] 7 4 0.512166092
> [49,] 6 1 0.163534429
> [50,] 6 2 0.546372632
> [51,] 6 8 0.232436857
> [52,] 8 6 0.328732443
> [53,] 8 7 0.426985839
> [54,] 7 8 0.428874578
> [55,] 7 5 0.592362505
> [56,] 7 4 0.550525442
> [57,] 6 1 0.300245589
> [58,] 6 1 0.515387165
> [59,] 6 1 0.266653108
> [60,] 6 8 0.079965966
> [61,] 8 6 0.428763849
> [62,] 8 7 0.517197781
> [63,] 8 7 -0.053802126
> [64,] 7 8 0.582440686
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`8`$summary
> Silhouette of 64 units in 8 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 9 5 2 13 10 13 7 5
> 0.5147528 0.3307414 0.5042983 0.4757130 0.3318094 0.2251809 0.5245949 0.3295756
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.3267 0.2587 0.4808 0.3913 0.5744 0.6713
>
>
> $`9`
> $`9`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 2 3 4 4 4 4 1 1 2 2 3 4 4 4 1 1 2 5
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 5 4 4 4 1 6 2 5 5 5 4 4 1 6 6 5 5 5 7 4
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 1 6 6 6 5 5 7 7 6 6 6 8 8 7 7 7 6 6 6 9
> V61 V62 V63 V64
> 8 8 8 7
>
> $`9`$silhouette
> cluster neighbor sil_width
> [1,] 1 2 0.548399290
> [2,] 1 2 0.508211444
> [3,] 2 1 0.482695637
> [4,] 3 2 0.486012596
> [5,] 4 7 0.480505999
> [6,] 4 7 0.671251104
> [7,] 4 7 0.617014733
> [8,] 4 7 0.571681721
> [9,] 1 2 0.614348771
> [10,] 1 2 0.640690523
> [11,] 2 1 0.194573104
> [12,] 2 1 0.542185027
> [13,] 3 4 0.522584038
> [14,] 4 7 0.651564195
> [15,] 4 7 0.668018377
> [16,] 4 7 0.656797552
> [17,] 1 2 0.606432781
> [18,] 1 2 0.461347781
> [19,] 2 1 0.534159703
> [20,] 5 2 0.033384271
> [21,] 5 7 0.001239274
> [22,] 4 7 0.585834033
> [23,] 4 7 0.646207181
> [24,] 4 7 0.445370847
> [25,] 1 2 0.584368411
> [26,] 6 1 -0.323051575
> [27,] 2 6 -0.132715203
> [28,] 5 6 0.491137574
> [29,] 5 7 0.611496445
> [30,] 5 7 0.322847773
> [31,] 4 7 0.206281855
> [32,] 4 7 0.157084470
> [33,] 1 6 0.414988691
> [34,] 6 1 0.367128406
> [35,] 6 5 -0.119811884
> [36,] 5 6 0.431913158
> [37,] 5 7 0.590226121
> [38,] 5 7 -0.136252415
> [39,] 7 4 0.481046113
> [40,] 4 7 -0.173342891
> [41,] 1 6 0.212594083
> [42,] 6 9 0.465248504
> [43,] 6 5 0.404550016
> [44,] 6 5 0.173918425
> [45,] 5 8 0.584675904
> [46,] 5 7 0.365123699
> [47,] 7 5 0.524748624
> [48,] 7 4 0.512166092
> [49,] 6 1 0.207476335
> [50,] 6 9 0.448509832
> [51,] 6 9 0.239971073
> [52,] 8 9 0.251639032
> [53,] 8 7 0.426985839
> [54,] 7 8 0.428874578
> [55,] 7 5 0.592362505
> [56,] 7 4 0.550525442
> [57,] 6 1 0.331454897
> [58,] 6 9 0.427547051
> [59,] 6 9 -0.178398913
> [60,] 9 6 0.000000000
> [61,] 8 9 -0.062464438
> [62,] 8 9 0.450767787
> [63,] 8 7 -0.053802126
> [64,] 7 8 0.582440686
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`9`$summary
> Silhouette of 64 units in 9 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 9 5 2 13 10 12 7 5
> 0.5101535 0.3241797 0.5042983 0.4757130 0.3295792 0.2037118 0.5245949 0.2026252
> 1
> 0.0000000
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.3231 0.2113 0.4561 0.3724 0.5744 0.6713
cluster_neuron(som_dm_trj , max_cluster = 9)
> NOTE: the method used to generate distance matrix was euclidean,
> however, if other method are considered, replace kohonen object with dist object or
> dist(<som object>$codes[[1]] , <chosen method>)
> # A tibble: 4 × 2
> `Number of clusters` `silhouette score \n average`
> <int> <dbl>
> 1 6 0.475
> 2 7 0.479
> 3 8 0.477
> 4 9 0.464
> $`6`
> $`6`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 2 2 3 4 5 5 1 1 2 2 2 3 4 4 1 1 1 2
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 3 4 4 4 6 6 1 1 2 3 4 4 6 6 1 1 2 3 3 3
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 6 6 6 1 1 2 2 3 6 6 6 1 2 2 2 2 6 6 6 6
> V61 V62 V63 V64
> 1 2 2 2
>
> $`6`$silhouette
> cluster neighbor sil_width
> [1,] 1 6 0.67891711
> [2,] 1 2 0.28976823
> [3,] 2 1 0.46363984
> [4,] 2 3 0.56655136
> [5,] 3 4 0.74370785
> [6,] 4 3 0.48983413
> [7,] 5 4 0.67948424
> [8,] 5 4 0.75727987
> [9,] 1 6 0.61566406
> [10,] 1 6 0.65682672
> [11,] 2 1 -0.20103349
> [12,] 2 1 0.59989654
> [13,] 2 3 -0.34401946
> [14,] 3 4 0.67014967
> [15,] 4 5 0.60720476
> [16,] 4 5 -0.47669502
> [17,] 1 6 0.34460958
> [18,] 1 6 0.46589826
> [19,] 1 2 0.43146594
> [20,] 2 3 0.63006949
> [21,] 3 2 0.44861029
> [22,] 4 3 -0.12097151
> [23,] 4 5 0.45138655
> [24,] 4 5 0.08108510
> [25,] 6 1 0.38466593
> [26,] 6 1 0.53537557
> [27,] 1 6 -0.06109653
> [28,] 1 2 0.63008412
> [29,] 2 3 0.67312400
> [30,] 3 2 0.76693783
> [31,] 4 3 0.26310006
> [32,] 4 3 0.61443242
> [33,] 6 1 0.78841028
> [34,] 6 1 0.70999933
> [35,] 1 6 -0.27918864
> [36,] 1 6 0.68390866
> [37,] 2 1 0.64112497
> [38,] 3 2 0.60790672
> [39,] 3 4 0.53043457
> [40,] 3 4 0.29310725
> [41,] 6 1 0.81149921
> [42,] 6 1 0.81544321
> [43,] 6 1 0.80744531
> [44,] 1 6 0.18258600
> [45,] 1 2 0.54614510
> [46,] 2 1 0.54239764
> [47,] 2 3 -0.11539387
> [48,] 3 2 0.71013772
> [49,] 6 1 0.78576756
> [50,] 6 1 0.80094174
> [51,] 6 1 0.75709753
> [52,] 1 6 0.55285399
> [53,] 2 1 0.01416095
> [54,] 2 1 0.35601797
> [55,] 2 3 0.34803812
> [56,] 2 3 0.16975398
> [57,] 6 1 0.73745139
> [58,] 6 1 0.76163073
> [59,] 6 1 0.81681698
> [60,] 6 1 0.64257602
> [61,] 1 6 0.68691499
> [62,] 2 1 0.21110197
> [63,] 2 1 0.67150588
> [64,] 2 3 0.47580825
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`6`$summary
> Silhouette of 64 units in 6 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 15 17 8 8 2 14
> 0.4283572 0.3354555 0.5963740 0.2386721 0.7183821 0.7253658
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.4767 0.3472 0.5832 0.4750 0.6847 0.8168
>
>
> $`7`
> $`7`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 2 3 4 5 6 6 1 1 2 2 3 4 5 5 1 1 1 3
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 4 5 5 5 7 7 1 1 2 4 5 5 7 7 1 1 2 4 4 4
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 7 7 7 1 1 2 3 4 7 7 7 1 2 2 3 3 7 7 7 7
> V61 V62 V63 V64
> 1 2 2 3
>
> $`7`$silhouette
> cluster neighbor sil_width
> [1,] 1 2 0.67486044
> [2,] 1 2 -0.18395233
> [3,] 2 3 0.72437974
> [4,] 3 2 0.52055806
> [5,] 4 3 0.68169763
> [6,] 5 4 0.48983413
> [7,] 6 5 0.67948424
> [8,] 6 5 0.75727987
> [9,] 1 7 0.61566406
> [10,] 1 7 0.65682672
> [11,] 2 1 0.30295193
> [12,] 2 3 0.63121539
> [13,] 3 4 0.31696767
> [14,] 4 5 0.67014967
> [15,] 5 6 0.60720476
> [16,] 5 6 -0.47669502
> [17,] 1 7 0.34460958
> [18,] 1 7 0.46589826
> [19,] 1 2 0.07695615
> [20,] 3 2 0.24629146
> [21,] 4 3 -0.12425176
> [22,] 5 4 -0.12097151
> [23,] 5 6 0.45138655
> [24,] 5 6 0.08108510
> [25,] 7 1 0.38466593
> [26,] 7 1 0.53537557
> [27,] 1 7 -0.06109653
> [28,] 1 2 0.45227113
> [29,] 2 3 -0.03570123
> [30,] 4 3 0.63034286
> [31,] 5 4 0.26310006
> [32,] 5 4 0.61443242
> [33,] 7 1 0.78841028
> [34,] 7 1 0.70999933
> [35,] 1 7 -0.27918864
> [36,] 1 2 0.55680851
> [37,] 2 3 0.51860172
> [38,] 4 3 0.27572791
> [39,] 4 5 0.53043457
> [40,] 4 5 0.29310725
> [41,] 7 1 0.81149921
> [42,] 7 1 0.81544321
> [43,] 7 1 0.80744531
> [44,] 1 7 0.18258600
> [45,] 1 2 0.30010034
> [46,] 2 3 0.69470408
> [47,] 3 4 0.55963495
> [48,] 4 3 0.50872796
> [49,] 7 1 0.78576756
> [50,] 7 1 0.80094174
> [51,] 7 1 0.75709753
> [52,] 1 7 0.55285399
> [53,] 2 1 0.49549850
> [54,] 2 1 0.70431105
> [55,] 3 2 0.73200315
> [56,] 3 4 0.70492877
> [57,] 7 1 0.73745139
> [58,] 7 1 0.76163073
> [59,] 7 1 0.81681698
> [60,] 7 1 0.64257602
> [61,] 1 2 0.62669339
> [62,] 2 1 0.62375632
> [63,] 2 3 0.31065073
> [64,] 3 2 0.66657112
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`7`$summary
> Silhouette of 64 units in 7 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 15 10 7 8 8 2 14
> 0.3321261 0.4970368 0.5352793 0.4332420 0.2386721 0.7183821 0.7253658
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.4767 0.3087 0.5582 0.4792 0.6971 0.8168
>
>
> $`8`
> $`8`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 2 3 4 5 6 6 1 1 2 2 3 4 7 7 1 1 1 3
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 4 5 7 7 8 8 1 1 2 4 5 5 8 8 1 1 2 4 4 4
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 8 8 8 1 1 2 3 4 8 8 8 1 2 2 3 3 8 8 8 8
> V61 V62 V63 V64
> 1 2 2 3
>
> $`8`$silhouette
> cluster neighbor sil_width
> [1,] 1 2 0.674860438
> [2,] 1 2 -0.183952332
> [3,] 2 3 0.724379741
> [4,] 3 2 0.520558064
> [5,] 4 5 0.624436468
> [6,] 5 7 0.675917767
> [7,] 6 7 0.482562903
> [8,] 6 7 0.659005900
> [9,] 1 8 0.615664055
> [10,] 1 8 0.656826716
> [11,] 2 1 0.302951931
> [12,] 2 3 0.631215388
> [13,] 3 4 0.316967670
> [14,] 4 5 0.484060686
> [15,] 7 5 -0.007469261
> [16,] 7 6 -0.057331737
> [17,] 1 8 0.344609575
> [18,] 1 8 0.465898257
> [19,] 1 2 0.076956152
> [20,] 3 2 0.246291456
> [21,] 4 3 -0.124251757
> [22,] 5 4 0.480074707
> [23,] 7 5 0.533154198
> [24,] 7 6 0.558273937
> [25,] 8 1 0.384665926
> [26,] 8 1 0.535375567
> [27,] 1 8 -0.061096530
> [28,] 1 2 0.452271131
> [29,] 2 3 -0.035701235
> [30,] 4 3 0.630342861
> [31,] 5 4 0.718444281
> [32,] 5 7 0.330643322
> [33,] 8 1 0.788410276
> [34,] 8 1 0.709999327
> [35,] 1 8 -0.279188635
> [36,] 1 2 0.556808510
> [37,] 2 3 0.518601716
> [38,] 4 3 0.275727908
> [39,] 4 5 0.195133547
> [40,] 4 5 -0.280108520
> [41,] 8 1 0.811499212
> [42,] 8 1 0.815443211
> [43,] 8 1 0.807445305
> [44,] 1 8 0.182586004
> [45,] 1 2 0.300100339
> [46,] 2 3 0.694704083
> [47,] 3 4 0.559634949
> [48,] 4 3 0.508727964
> [49,] 8 1 0.785767563
> [50,] 8 1 0.800941737
> [51,] 8 1 0.757097533
> [52,] 1 8 0.552853989
> [53,] 2 1 0.495498499
> [54,] 2 1 0.704311047
> [55,] 3 2 0.732003152
> [56,] 3 4 0.704928770
> [57,] 8 1 0.737451391
> [58,] 8 1 0.761630726
> [59,] 8 1 0.816816978
> [60,] 8 1 0.642576016
> [61,] 1 2 0.626693394
> [62,] 2 1 0.623756321
> [63,] 2 3 0.310650730
> [64,] 3 2 0.666571116
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`8`$summary
> Silhouette of 64 units in 8 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 15 10 7 8 4 2 4 14
> 0.3321261 0.4970368 0.5352793 0.2892586 0.5512700 0.5707844 0.2566568 0.7253658
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.2801 0.3154 0.5548 0.4772 0.6971 0.8168
>
>
> $`9`
> $`9`$cluster_number
> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
> 1 1 2 3 4 5 6 6 7 1 2 2 3 4 8 8 7 7 1 3
> V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
> 4 5 8 8 9 9 7 1 2 4 5 5 9 9 7 1 2 4 4 4
> V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
> 9 9 9 7 1 2 3 4 9 9 9 7 2 2 3 3 9 9 9 9
> V61 V62 V63 V64
> 1 2 2 3
>
> $`9`$silhouette
> cluster neighbor sil_width
> [1,] 1 7 0.332532791
> [2,] 1 2 0.330078871
> [3,] 2 1 0.657297965
> [4,] 3 2 0.520558064
> [5,] 4 5 0.624436468
> [6,] 5 8 0.675917767
> [7,] 6 8 0.482562903
> [8,] 6 8 0.659005900
> [9,] 7 1 0.301478405
> [10,] 1 7 -0.070106266
> [11,] 2 1 -0.172570611
> [12,] 2 3 0.631215388
> [13,] 3 4 0.316967670
> [14,] 4 5 0.484060686
> [15,] 8 5 -0.007469261
> [16,] 8 6 -0.057331737
> [17,] 7 1 0.759662889
> [18,] 7 1 0.703796134
> [19,] 1 2 0.544446442
> [20,] 3 2 0.246291456
> [21,] 4 3 -0.124251757
> [22,] 5 4 0.480074707
> [23,] 8 5 0.533154198
> [24,] 8 6 0.558273937
> [25,] 9 7 -0.239052505
> [26,] 9 7 0.107059555
> [27,] 7 9 0.595461294
> [28,] 1 7 0.700252468
> [29,] 2 3 -0.035701235
> [30,] 4 3 0.630342861
> [31,] 5 4 0.718444281
> [32,] 5 8 0.330643322
> [33,] 9 7 0.669204075
> [34,] 9 7 0.510433483
> [35,] 7 9 0.395371196
> [36,] 1 7 0.654866425
> [37,] 2 3 0.518601716
> [38,] 4 3 0.275727908
> [39,] 4 5 0.195133547
> [40,] 4 5 -0.280108520
> [41,] 9 7 0.730944007
> [42,] 9 7 0.725605776
> [43,] 9 7 0.707545456
> [44,] 7 9 0.716499807
> [45,] 1 2 0.678558513
> [46,] 2 1 0.694250988
> [47,] 3 4 0.559634949
> [48,] 4 3 0.508727964
> [49,] 9 7 0.703553401
> [50,] 9 7 0.720493424
> [51,] 9 7 0.606895674
> [52,] 7 1 0.572707019
> [53,] 2 1 0.196836954
> [54,] 2 1 0.578411605
> [55,] 3 2 0.732003152
> [56,] 3 4 0.704928770
> [57,] 9 7 0.647157921
> [58,] 9 7 0.675568460
> [59,] 9 7 0.733452248
> [60,] 9 7 0.364020698
> [61,] 1 7 0.547716423
> [62,] 2 1 0.437490502
> [63,] 2 3 0.310650730
> [64,] 3 2 0.666571116
> attr(,"Ordered")
> [1] FALSE
> attr(,"call")
> silhouette.default(x = cluster_numbers, dist = dist_neurons)
> attr(,"class")
> [1] "silhouette"
>
> $`9`$summary
> Silhouette of 64 units in 9 clusters from silhouette.default(x = cluster_numbers, dist = dist_neurons) :
> Cluster sizes and average silhouette widths:
> 8 10 7 8 4 2 7 4
> 0.4647932 0.3816484 0.5352793 0.2892586 0.5512700 0.5707844 0.5778538 0.2566568
> 14
> 0.5473487
> Individual silhouette widths:
> Min. 1st Qu. Median Mean 3rd Qu. Max.
> -0.2801 0.3268 0.5530 0.4637 0.6757 0.7597
after checking the result the 6 clusters is the optimal number
plot cluster with boundries REQ-8
set.seed(100)
# prealignment
plot_clustered_map(som_pre_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
# distance matrix
plot_clustered_map(som_dm_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
draw a arbitrary path REQ-9
arbitrary_path <- c( 9 , 56 , 44 , 33 , 22)
# prealignment
plot_clustered_map(som_pre_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
add_evolution_trace(som_pre_trj , arbitrary_path)
# distance matrix
plot_clustered_map(som_dm_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
add_evolution_trace(som_dm_trj , arbitrary_path)
add population of each neuron on the map with shape
# prealignment
plot_clustered_map(som_pre_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
add_neuron_population(som_pre_trj , shape = "round")
# distance matrix
plot_clustered_map(som_dm_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
add_neuron_population(som_dm_trj , shape = "round")
add population of each neuron on the map with digit
# prealignment
plot_clustered_map(som_pre_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
add_neuron_population(som_pre_trj ,text = TRUE ,shape = "round")
# distance matrix
plot_clustered_map(som_dm_trj, cluster_method = "kmeans" , clustering_parameter = c( 6 , "Hartigan-Wong") , shape = "straight")
add_neuron_population(som_dm_trj ,text = TRUE ,shape = "round")
plot an external property of trajectories
## we have not that external trajectories, it is generated randomly
set.seed(100)
som_pre_k <- kmeans(som_pre_trj$codes[[1]] , 6)
dummy_property_nuerons <- rnorm(64 , mean = 8 , sd = 4)
# prealignment
plot_property(som_pre_trj , property = dummy_property_nuerons ,shape = "straight",
palette_name = colorRampPalette(c("blue", "white", "red")))
highlight_a_cluster(som_pre_trj,
som_pre_k$cluster,
cluster_number = 3,
label = "cluster 3" ,
# property_value = "66",
property_color = NULL,
label_color = "darkgreen",
col = "green",
lwd = 2,
cex = 2)
# distance matrix
highlight_a_cluster(som_pre_trj,
som_pre_k$cluster,
cluster_number = 5,
#label = "cluster 2" ,
#property_value = "66",
property_color = NULL,
#label_color = "darkgreen",
col = "green",
#lwd = 2,
#cex = 1.2
)
plot_property(som_pre_trj , property = dummy_property_nuerons ,shape = "straight",
palette_name = colorRampPalette(c("blue", "white", "red")))
highlight_a_cluster(som_pre_trj,
som_pre_k$cluster,
cluster_number = 3,
label = "important cluster" ,
# property_value = "66",
property_color = NULL,
label_color = "darkgreen",
col = "orange",
lwd = 5,
cex = 2)
highlight_a_cluster(som_pre_trj,
som_pre_k$cluster,
cluster_number = 5,
#label = "cluster 2" ,
property_value = "66",
property_color = NULL,
label_color = "darkgreen",
col = "yellow",
lwd = 5,
cex = 1.2)